Setup

Notes

  • Cell suspensions of meningeal dura enriched by MACS for cells expressing Cd45, Cd31, and Lyve-1
    • Cd45/Ptprc/B220/Ly-5/Lyt-4/T200: transmembrane protein tyrosine phosphatase, located on most haematopoietic cells, positive selection of leukocytes
    • Cd31/Pecam1: adhesion and signaling receptor that is expressed on endothelial and hematopoietic cells, positive selection of endothelial cells
    • Lyve1/1200012G08Rik/Lyve-1/Xlkd1/lymphatic vessel endothelial HA receptor-1: found primarily on lymphatic endothelial cells

Libraries

library(dplyr)          # ungroup()
library(ggtree)         # BuildClusterTree()
library(gridExtra)      # grid.arrange()
library(gtools)         # smartbind()
library(parallel)       # detectCores()
library(plotly)         # plot_ly()
library(Seurat)         # Idents()
library(SeuratWrappers) # RunPrestoAll()
library(ShinyCell)      # createConfig()
library(tidyr)          # %>%

Variables

out <- "../../results/pass_1/all_clusters/"
sample_order <- c("E3.M","E3.F","E4.M","E4.F")
sample_colors <- c("#26946A","#1814A1","#EAC941","#DF5F00")
sample_order2 <- c("Male E3", "Male E4", "Female E3", "Female E4")
isoform_order <- c("E4","E3")
isoform_colors <- c("darkgray","cornflowerblue")
sex_order <- c("Male","Female")
sex_colors <- c("darkgray","purple")

Read data

mouse.unannotated <- readRDS("../../rObjects/unannotated_obj.rds")
Idents(mouse.unannotated) <- "seurat_clusters"
DefaultAssay(mouse.unannotated) <- "RNA"
mouse.unannotated <- NormalizeData(mouse.unannotated)
## Normalizing layer: counts
mouse.unannotated
## An object of class Seurat 
## 41866 features across 16965 samples within 2 assays 
## Active assay: RNA (21089 features, 0 variable features)
##  2 layers present: data, counts
##  1 other assay present: SCT
##  3 dimensional reductions calculated: pca, harmony, umap

Unannotated QC

UMAP

cluster_colors <- c("red","orange","gold","lightgreen","chartreuse3","darkgreen",
                    "cyan","steelblue","blue","deeppink","pink","purple","chocolate4",
                    "gray","black")
u1 <- DimPlot(object = mouse.unannotated,
              reduction = "umap",
              shuffle = TRUE,
              repel = TRUE,
              raster = FALSE,
              cols = cluster_colors,
              label = TRUE)
u1

u2 <- DimPlot(object = mouse.unannotated,
              reduction = "umap",
              shuffle = TRUE,
              repel = TRUE,
              dims = c(2,3),
              cols = cluster_colors,
              label = TRUE)
u2

Feature plots

# UMAP percent.mt
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "percent.mt") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP percent.ribo
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "percent.ribo.protein") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP percent.hb
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "percent.hb") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP nCount
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "nCount_RNA") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP nFeature
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "nFeature_RNA") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP cell.complexity
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "cell.complexity") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

Violins

VlnPlot(mouse.unannotated,
        features = "nCount_RNA",
        split.by = "seurat_clusters")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(mouse.unannotated,
        features = "nFeature_RNA",
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "cell.complexity",
        split.by = "seurat_clusters")

Cells per cluster

# Cells per sample per cluster
sample_ncells <- FetchData(mouse.unannotated, 
                     vars = c("ident", "sample")) %>%
  dplyr::count(ident,sample) %>%
  tidyr::spread(ident, n)
sample_ncells
##   sample    0   1   2   3   4   5   6   7   8   9  10  11  12 13 14
## 1   E3.M  748 690 232 307 296 216 408 285 282 118  84 105  49 94 58
## 2   E3.F  907 328 306 176 299 218  34 207 285 178  65  31  21 58 42
## 3   E4.M  606 654 244 352 304 253 393 254 304 164  63  66  55 48 61
## 4   E4.F 1201 940 606 488 394 573 382 421 109 295 155 142 182 70 59
# Cells per isoform per cluster
isoform_ncells <- FetchData(mouse.unannotated, 
                     vars = c("ident", "isoform")) %>%
  dplyr::count(ident,isoform) %>%
  tidyr::spread(ident, n)
isoform_ncells
##   isoform    0    1   2   3   4   5   6   7   8   9  10  11  12  13  14
## 1      E4 1807 1594 850 840 698 826 775 675 413 459 218 208 237 118 120
## 2      E3 1655 1018 538 483 595 434 442 492 567 296 149 136  70 152 100
# Cells per sex per cluster
sex_ncells <- FetchData(mouse.unannotated, 
                     vars = c("ident", "sex")) %>%
  dplyr::count(ident,sex) %>%
  tidyr::spread(ident, n)
sex_ncells
##      sex    0    1   2   3   4   5   6   7   8   9  10  11  12  13  14
## 1   Male 1354 1344 476 659 600 469 801 539 586 282 147 171 104 142 119
## 2 Female 2108 1268 912 664 693 791 416 628 394 473 220 173 203 128 101

Gene histogram

# User params
goi <- "Malat1"
threshold <- 1

# Subset data
log2.threshold <- log2(threshold + 0.01)
counts.df <- FetchData(mouse.unannotated, vars = goi)
colnames(counts.df) <- "counts"
log2.counts.df <- log2(counts.df + 0.01)

# Histogram
title <- paste0(goi, "\nnCount_RNA > ", threshold)
hist1 <- ggplot(counts.df, aes(x = counts)) + 
  geom_histogram(bins = 100, fill = "gray", color = "black") + 
  labs(title = title, x=NULL, y=NULL) +
  xlab(paste0(goi, " nCount_RNA")) + ylab("# of Samples") + theme_bw() +
  geom_vline(xintercept = threshold, col = "blue") +
  annotate("rect", 
           xmin = -Inf,
           xmax = threshold, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="chocolate4") +
  annotate("rect", 
           xmin = threshold,
           xmax = Inf, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="deepskyblue")

# Histogram log transformed
hist2 <- ggplot(log2.counts.df, aes(x = counts)) + 
  geom_histogram(bins = 100, fill = "gray", color = "black") + 
  labs(title = title, x=NULL, y=NULL) +
  xlab(paste0("Log2(",goi, " nCount_RNA)")) + ylab("# of Samples") + theme_bw() +
  geom_vline(xintercept = log2.threshold, col = "blue") +
  annotate("rect", 
           xmin = -Inf,
           xmax = log2.threshold, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="chocolate4") +
  annotate("rect", 
           xmin = log2.threshold,
           xmax = Inf, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="deepskyblue")

# plot
plots1 <- list(hist1,hist2)
layout1 <- rbind(c(1),c(2))
grid1 <- grid.arrange(grobs = plots1, layout_matrix = layout1)

Percent gene

# user define variable
goi <- "Malat1"

# Extract counts data
DefaultAssay(mouse.unannotated) <- "RNA"
Idents(mouse.unannotated) <- "seurat_clusters"
geneData <- FetchData(mouse.unannotated,
                      vars = goi,
                      slot = "counts")
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
geneData <- geneData > 1
table(geneData)
## geneData
## FALSE  TRUE 
##  1248 15717
mouse.unannotated$Expression <- geneData

FetchData(mouse.unannotated,
          vars = c("ident", "Expression")) %>%
  dplyr::count(ident, Expression) %>%
  tidyr::spread(ident, n)
##   Expression    0    1    2    3    4   5    6    7   8   9  10  11  12  13  14
## 1      FALSE    5  124    4   NA    5 755    1   43   9  NA 285  13   2   1   1
## 2       TRUE 3457 2488 1384 1323 1288 505 1216 1124 971 755  82 331 305 269 219
# Plot
mouse.unannotated@meta.data %>%
  group_by(seurat_clusters, Expression) %>%
  dplyr::count() %>%
  group_by(seurat_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=seurat_clusters,y=percent, fill=Expression)) +
  geom_col() +
  ggtitle(paste0("Percentage of cells with > 1 counts for ", goi)) +
  theme(axis.text.x = element_text(angle = 90))

Cluster tree

  • Cluster trees are helpful in deciding what clusters to merge.
mouse.unannotated <- BuildClusterTree(object = mouse.unannotated,
                                     dims = 1:15,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)
tree <- mouse.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)

ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = cluster_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Potential Markers

MACS markers

  • Ptprc - located on most haematopoietic cells, positive selection of leukocytes
  • Pecam1 - adhesion and signaling receptor that is expressed on endothelial and hematopoietic cells, positive selection of endothelial cells
  • Lyve1 - found primarily on lymphatic endothelial cells
VlnPlot(mouse.unannotated,
        features = "Ptprc",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Lyve1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Pecam1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

B-cells / Plasma cells

  • Cd19: expressed in B-cells and follicular dendritic cells
  • Fcrla: a B-cell specific protein in mice
  • Cd79a & Cd79b: together form BCR complex
  • Sdc1: Plasma cells
VlnPlot(mouse.unannotated,
        features = "Cd19",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Fcrla",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd79a",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd79b",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Sdc1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

T-cells

  • Trac/Cd3d/Cd3e/Cd3g are components of the TCR
VlnPlot(mouse.unannotated,
        features = "Trac",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd3e",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd8a",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Cd4",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Endothelial cells

VlnPlot(mouse.unannotated,
        features = "Ly6c1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Flt1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Fibroblasts

VlnPlot(mouse.unannotated,
        features = "Col1a1",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Col1a2",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Mast cells

VlnPlot(mouse.unannotated,
        features = "Fcer1a",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Kit", # aka Cd117
        cols = cluster_colors,
        split.by = "seurat_clusters")

Macrophage

VlnPlot(mouse.unannotated,
        features = "C1qa",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "C1qb",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Monocytes/DCs

  • Cd11c/Itgax: Monocytes & DCs
VlnPlot(mouse.unannotated,
        features = c("Itgax","Cd209a","Flt3","Zbtb46","Ccr2"),
        cols = cluster_colors,
        split.by = "seurat_clusters",
        stack = TRUE,
        flip = TRUE)

Neutrophils

VlnPlot(mouse.unannotated,
        features = "Ly6g",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Retnlg",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Pericytes & SMCs

VlnPlot(mouse.unannotated,
        features = "Acta2",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Myl9",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Rgs5",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Schwann cells

VlnPlot(mouse.unannotated,
        features = "Cdh19",
        cols = cluster_colors,
        split.by = "seurat_clusters")

VlnPlot(mouse.unannotated,
        features = "Mpz",
        cols = cluster_colors,
        split.by = "seurat_clusters")

Sandro’s markers

Idents(mouse.unannotated) <- "seurat_clusters"
goi <- c("Pdgfrb","Acta2","Vwf","Cldn5","Pecam1","Flt4","Prox1","Lyve1","Ptprc",
         "Cd19","Ms4a1","Ighd","Cd3e","Trbc2","Il7r","Nkg7","Klrb1b","Klrb1c",
         "Gata3","Rora","Itgax","H2-Eb1","Ccr2","Ly6c2","Lyz2","Ly6g","Itgam",
         "Mrc1","Csf1r","Cd38","Mki67","Mcpt4","Ms4a2","Col1a2","Plp1")

v1 <- VlnPlot(mouse.unannotated,
              features = goi,
              cols = cluster_colors,
              split.by = "seurat_clusters",
              flip = TRUE,
              stack = TRUE)
v1

pdf(paste0(out, "markers/sandro_markers_violin_unannotated.pdf"), width = 8, height = 8)
v1
dev.off()
## png 
##   2

Automatically detect markers

# auto find markers
Idents(mouse.unannotated) <- "seurat_clusters"
all.markers <- SeuratWrappers::RunPrestoAll(
  object = mouse.unannotated,
  assay = "RNA",
  slot = "counts",
  only.pos = TRUE
)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
# filter based on p_val_adj
all.markers <- all.markers[all.markers$p_val_adj < 0.01,]

# subset table to top 2 and top 20 markers
top2 <- Reduce(rbind,
               by(all.markers,
                  all.markers["cluster"],
                  head,
                  n = 2))
top20 <- Reduce(rbind,
               by(all.markers,
                  all.markers["cluster"],
                  head,
                  n = 20))

# plot violin
v1 <- VlnPlot(mouse.unannotated,
        features = top2$gene,
        split.by = "seurat_clusters",
        flip = TRUE,
        stack = TRUE,
        cols = cluster_colors)
v1

# save table
write.table(all.markers, 
            paste0(out, "markers/unannotated_auto_find_markers_adjpval_0.01.tsv"),
            quote = FALSE,
            row.names = FALSE)
# compare 
table(all.markers$cluster)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 3273 2058 3427 3214 3808 1360 2879 1587 1304 1335  570 2207  867  649 1143
# subset based on cluster
cluster0 <- all.markers[all.markers$cluster == 0,]
cluster1 <- all.markers[all.markers$cluster == 1,]
cluster2 <- all.markers[all.markers$cluster == 2,]
cluster3 <- all.markers[all.markers$cluster == 3,]
cluster4 <- all.markers[all.markers$cluster == 4,]
cluster5 <- all.markers[all.markers$cluster == 5,]
cluster6 <- all.markers[all.markers$cluster == 6,]
cluster7 <- all.markers[all.markers$cluster == 7,]
cluster8 <- all.markers[all.markers$cluster == 8,]
cluster9 <- all.markers[all.markers$cluster == 9,]
cluster10 <- all.markers[all.markers$cluster == 10,]
cluster11 <- all.markers[all.markers$cluster == 11,]
cluster12 <- all.markers[all.markers$cluster == 12,]
cluster13 <- all.markers[all.markers$cluster == 13,]
cluster14 <- all.markers[all.markers$cluster == 14,]

Cluster Annotations

Cluster 0: Macrophages

VlnPlot(mouse.unannotated,
        features = cluster0$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 1: Endothelail

VlnPlot(mouse.unannotated,
        features = cluster1$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 2: Fibroblasts

VlnPlot(mouse.unannotated,
        features = cluster2$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 3: T and NK cells

VlnPlot(mouse.unannotated,
        features = cluster3$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 4: DCs

VlnPlot(mouse.unannotated,
        features = cluster4$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 5: Macrophages

VlnPlot(mouse.unannotated,
        features = cluster5$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 6: B cells

VlnPlot(mouse.unannotated,
        features = cluster6$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 7: Endothelial

VlnPlot(mouse.unannotated,
        features = cluster7$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 8: Neutrophils

VlnPlot(mouse.unannotated,
        features = cluster8$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 9: ILCs

VlnPlot(mouse.unannotated,
        features = cluster9$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 10: Fibroblasts

VlnPlot(mouse.unannotated,
        features = cluster10$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 11: Pericytes and SMCs

VlnPlot(mouse.unannotated,
        features = cluster11$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 12: Schwann cells

VlnPlot(mouse.unannotated,
        features = cluster12$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 13: Mast cells

VlnPlot(mouse.unannotated,
        features = cluster13$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Cluster 14: Plasma cells

VlnPlot(mouse.unannotated,
        features = cluster14$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "seurat_clusters")

Assign Identities

# Rename all identities
mouse.annotated <- RenameIdents(object = mouse.unannotated,
                                "0" = "Macrophages",         # Csf1r,C1qb
                                "1" = "Endothelial",         # Flt1,Ptprb
                                "2" = "Fibroblasts",         # Col1a1       
                                "3" = "T and NK cells",      # Skap1      
                                "4" = "Dendritic cells",     # Ccr2,Itgax  
                                "5" = "Macrophages",         # C1qb
                                "6" = "B cells",             # Pax5
                                "7" = "Endothelial",         # Vwf,Ptprb
                                "8" = "Neutrophils",         # S100a9,Hp
                                "9" = "ILCs",                # Gata3  
                                "10" = "Fibroblasts",        # Col1a1 
                                "11" = "Pericytes and SMCs", # Myh11,Notch3 
                                "12" = "Schwann cells",      # Mpz
                                "13" = "Mast cells",         # Kit
                                "14" = "Plasma cells")       # Sdc1

# save idents
mouse.annotated$annotated_clusters <- Idents(mouse.annotated)

# set levels
mouse.annotated$annotated_clusters <- factor(mouse.annotated$annotated_clusters,
                                             levels = c("Pericytes and SMCs",
                                                        "Endothelial",
                                                        "B cells",
                                                        "Plasma cells",
                                                        "T and NK cells",
                                                        "ILCs",
                                                        "Dendritic cells",
                                                        "Neutrophils",
                                                        "Macrophages",
                                                        "Mast cells",
                                                        "Fibroblasts",
                                                        "Schwann cells"))

# set ident
Idents(mouse.annotated) <- "annotated_clusters"

Annotated UMAP

# set params
DefaultAssay(mouse.annotated) <- "RNA"
mouse.annotated <- NormalizeData(mouse.annotated)
## Normalizing layer: counts
cluster_colors <- c("#B5B9BA", # Pericytes and SMCs
                    "#40BBFF", # Endothelial
                    "#a5d5a9", # B cells
                    "#5dbd64", # Plasma cells
                    "#1c7e24", # T and NK cells
                    "#F57C7C", # ILCs
                    "#E42622", # Dendritic cells
                    "#FBB268", # Neutrophils
                    "#FE8D19", # Macrophages
                    "#A6CEE3", # Mast cells
                    "#9D7BBA", # Fibroblasts
                    "#977899") # Schwann cells

# umap
umap1 <- DimPlot(object = mouse.annotated, 
                reduction = "umap",
                repel = TRUE,
                group.by = "annotated_clusters",
                cols = cluster_colors)
umap1

umap2 <- DimPlot(object = mouse.annotated, 
                reduction = "umap",
                repel = TRUE,
                dims = c(2,3),
                group.by = "annotated_clusters",
                cols = cluster_colors)
umap2

Cluster tree

# build tree
mouse.annotated <- BuildClusterTree(object = mouse.annotated,
                                     dims = 1:15,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)
tree <- mouse.annotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)

# plot tree
ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = cluster_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Find markers

Automatically detect markers

# auto find markers
Idents(mouse.annotated) <- "annotated_clusters"
all.markers <- SeuratWrappers::RunPrestoAll(
  object = mouse.annotated,
  assay = "RNA",
  slot = "counts",
  only.pos = TRUE
)
## Calculating cluster Pericytes and SMCs
## Calculating cluster Endothelial
## Calculating cluster B cells
## Calculating cluster Plasma cells
## Calculating cluster T and NK cells
## Calculating cluster ILCs
## Calculating cluster Dendritic cells
## Calculating cluster Neutrophils
## Calculating cluster Macrophages
## Calculating cluster Mast cells
## Calculating cluster Fibroblasts
## Calculating cluster Schwann cells
# filter based on p_val_adj
all.markers <- all.markers[all.markers$p_val_adj < 0.01,]

# subset table to top 2 and top 20
top2 <- Reduce(rbind,
               by(all.markers,
                  all.markers["cluster"],
                  head,
                  n = 2))
top20 <- Reduce(rbind,
               by(all.markers,
                  all.markers["cluster"],
                  head,
                  n = 20))

# save
write.table(all.markers, 
            paste0(out, "markers/annotated_auto_find_markers_adjpval_0.01.tsv"),
            quote = FALSE,
            row.names = FALSE)

Violin markers

v1 <- VlnPlot(mouse.annotated,
        features = top2$gene,
        split.by = "annotated_clusters",
        flip = TRUE,
        stack = TRUE,
        cols = cluster_colors)
v1

goi <- c("Pdgfrb","Acta2","Vwf","Cldn5","Pecam1","Flt4","Prox1","Lyve1","Ptprc",
         "Cd19","Ms4a1","Ighd","Cd3e","Trbc2","Il7r","Nkg7","Klrb1b","Klrb1c",
         "Gata3","Rora","Itgax","H2-Eb1","Ccr2","Ly6c2","Lyz2","Ly6g","Itgam",
         "Mrc1","Csf1r","Cd38","Mki67","Mcpt4","Ms4a2","Col1a2","Plp1")
v2 <- VlnPlot(mouse.annotated,
              group.by = "annotated_clusters",
              split.by = "annotated_clusters",
              stack = TRUE,
              flip = TRUE,
              cols = cluster_colors,
              features = goi)
v2

Pseudobulk PCA

  • Each sample will have data points equal to the number of annotated_clusters
# Step 1: Pseudo-bulk the counts based on sample and cell type
pseudo <- AggregateExpression(
    object = mouse.annotated, 
    assays = "RNA", 
    features = rownames(mouse.annotated),
    return.seurat = TRUE, 
    group.by = c("sample", "annotated_clusters")
)
## Centering and scaling data matrix
# Step 2: Normalize the data
pseudo <- NormalizeData(pseudo,
                        normalization.method = "LogNormalize",
                        scale.factor = 10000)
## Normalizing layer: counts
# Step 3: Find variable features
pseudo <- FindVariableFeatures(pseudo,
                               selection.method = "vst", 
                               nfeatures = 2000)
## Finding variable features for layer counts
# Step 4: Scale the data
pseudo <- ScaleData(pseudo, 
                    features = rownames(pseudo))
## Centering and scaling data matrix
# Step 5: Run PCA
pseudo <- RunPCA(pseudo, 
                 features = VariableFeatures(pseudo),
                 npcs = 10)
## PC_ 1 
## Positive:  Gna15, Dclre1c, Bcl2a1b, Gpr65, Rgs10, Tmem8, Fcgr2b, Ms4a6b, Ctsc, Clcn5 
##     Cfp, Cd86, Slc37a2, Gm26542, Apobec1, Zeb2os, Fcgr3, Rbpj, Wfdc17, Gpr160 
##     Csf1r, Clec10a, Nlrc4, P2ry12, Cd300c2, Trf, C1qb, Ifi213, C1qa, Cndp2 
## Negative:  Wwtr1, Fbxl7, Myo10, Me3, Synpo, Cxcl12, Sulf1, Adarb1, Plxna2, Plpp3 
##     Tspan18, Efnb1, Nfib, Smad6, Osmr, Vangl1, Amotl1, Pdgfd, Ppic, Cttnbp2 
##     Arhgap29, Zfp423, Grb10, Zfpm2, Tshz2, Cttn, Uaca, Zfp697, Rhoj, Nhsl1 
## PC_ 2 
## Positive:  Icam2, Slfn3, Cmtm8, Pecam1, Wipf3, Spns2, Rasip1, Arl15, Pkn3, Cd93 
##     Ripply3, Slc2a1, Flt4, Mcf2l, Cd300lg, Fam167b, B3gnt3, Als2cl, Kank3, Nova2 
##     Ccm2l, Myct1, Cytl1, Klhl4, Stap2, Sox7, Tie1, Exoc3l2, Lhx6, Deup1 
## Negative:  Slc38a2, Mdk, Glrb, Foxd1, Olfml3, Clip4, Fmod, Col12a1, Lum, Clmp 
##     Lmx1b, Pdgfrl, Shisa3, Frmpd4, Sfrp4, Adamts2, Slc4a10, Mfap4, Car13, Thbs3 
##     Gpx3, Cacna2d2, Egfr, Six2, Ecrg4, Pdgfra, Col1a1, Cdh11, Fbln1, Prrx2 
## PC_ 3 
## Positive:  Arhgap24, Chchd10, Chst3, Srpk3, Scd1, Cpm, Fcrla, Gm42836, Spib, 2010309G21Rik 
##     Sbk1, Gm43388, Ly6d, Cd79a, Bfsp2, Il9r, Gfra1, Cecr2, Siglecg, Agbl1 
##     Vpreb3, Pou2af1, Gm34215, Ms4a1, Klhl14, H2-Eb2, Tmem108, Tnfrsf13c, Cplx2, Fcmr 
## Negative:  Dab2, Rnase4, Rab11fip5, Gm4951, Rab3il1, Vcam1, Ophn1, Slco2b1, Stab1, Wwp1 
##     Cmklr1, Trpv4, Ang, Hpgd, Hfe, F830016B08Rik, Hsf3, Smagp, Serpinb8, Abca9 
##     Prune2, Tmem106a, Gm15635, Abcc3, Gna12, Reps2, C3ar1, 2610203C22Rik, Pla2g15, Cables1 
## PC_ 4 
## Positive:  Kcnj12, Tmod1, Ctnna3, Itga7, Il34, Sncg, Cacna2d1, Akap6, Tagln, Synpo2 
##     Nrip2, Nrxn1, Rasl12, Rgs6, Myh11, Myl9, Cap2, Dgkb, Myom1, Pln 
##     Axl, Gm34829, Tbx3os1, Jph2, Mcam, Susd5, Casq2, Rgs4, Tpm2, Ryr2 
## Negative:  Dennd3, Erg, Nxn, 2700054A10Rik, Dync1i1, Gm16046, Gm16070, Myzap, Chrm3, Tnfsf10 
##     Calcrl, Nxpe2, Dchs1, Crispld1, Gja1, Ptpru, Rgs7, Trpm6, Dkk2, Pik3c2b 
##     Eya1, Ifitm10, Ism1, Kcnj15, Adamts9, Fmo1, Sybu, Ptprq, F8, Aff3 
## PC_ 5 
## Positive:  Anxa1, Mmp8, Bmx, Sgms2, Cd177, Lcn2, B230208H11Rik, B430306N03Rik, Trem3, Abca13 
##     Scrg1, Il1rn, F5, Dhrs9, Camp, Arg2, AA467197, Acod1, Mgam, 4930438A08Rik 
##     Padi4, Slfn4, Fpr2, Clec4d, Cebpe, Slc16a3, Fpr1, Wfdc21, A530064D06Rik, Ngp 
## Negative:  Ptprcap, Gimap7, Ikzf3, Gimap3, Fam169b, Cd2, Gpr174, Ablim1, Unc5cl, Bach2 
##     Aff3, Slamf6, Myo3b, Sidt1, Rnf43, Lck, Tox, Hdac7, Ctla4, A430093F15Rik 
##     Blk, Clnk, Trbc2, Btla, Gprin3, Gm39323, Il2rb, Ctsw, Themis, Zap70
# Step 6: Visualize PCA
pca_colors <- c("firebrick1","cyan","gold","blue","black","forestgreen",
                    "darkorchid1","green","gray","deeppink","chocolate4",
                    "steelblue","pink","orange")
pca <- DimPlot(pseudo, 
        reduction = "pca",
        group.by = "annotated_clusters",
        cols = pca_colors,
        pt.size = 3)
pca

pdf(paste0(out, "clustering_QC/pseudobulk_pca.pdf"), width = 6, height = 4)
pca
dev.off()
## png 
##   2

Shiny App

# create new object
shiny.obj <- mouse.annotated
VariableFeatures(shiny.obj) <- shiny.obj@assays$SCT@var.features

# set default params
DefaultAssay(shiny.obj) <- "RNA"
Idents(shiny.obj) <- "annotated_clusters"

# create config
names <- colnames(shiny.obj@meta.data)
names <- names[c(22,23,2:21)]
sc.config <- createConfig(obj = shiny.obj,
                          meta.to.include = names)

# change wd
setwd(out)

# output shiny app folder
makeShinyApp(obj = shiny.obj, 
             scConf = sc.config, 
             gene.mapping = TRUE,
             shiny.title = "All Clusters")

# manual config edits
sc1conf <- readRDS("shinyApp/sc1conf.rds")
sc1conf[2,4] <- "#B5B9BA|#40BBFF|#a5d5a9|#5dbd64|#1c7e24|#F57C7C|#E42622|#FBB268|#FE8D19|#A6CEE3|#9D7BBA|#977899"
saveRDS(sc1conf, "shinyApp/sc1conf.rds")